# Crime Data from 2010 to Present
# From https://data.lacity.org/A-Safe-City/Crime-Data-from-2010-to-Present/y8tr-7khq/data
#### Heatmap Attempt ####
crime <- read.csv("/Users/charliecarter/Downloads/Crime_Data_from_2010_to_Present.csv",
stringsAsFactors = F)
## Parallel Computing To Derive Latitude and Longitude Quickly ####
library(parallel)
no_cores <- detectCores() - 1
cl <- makeCluster(no_cores)
## get lat
crime$lat <- parLapply(cl, X = crime$Location, fun = function(x) {
as.numeric(
unlist(
strsplit(substr(x, 2, nchar(x) - 1),
split= ", ")[[1]][1]
)
)
})
## get lon
crime$lon <- parLapply(cl, X = crime$Location, fun = function(x) {
as.numeric(
unlist(
strsplit(substr(x, 2, nchar(x) - 1),
split= ", ")[[1]][2]
)
)
})
stopCluster(cl)
crime$lat <- unlist(crime$lat)
crime$lon <- unlist(crime$lon)
#############
## Filter out outlier longitude points
crime <- crime %>%
filter(crime$lon != 0)
## get contour lines that show regions by density
density.poly <- contourLines(
kde2d(crime$lon, crime$lat,
lims = c(
expand_range(range(crime$lon), add = 0.5),
expand_range(range(crime$lat), add = 0.5)
))
)
## create column that subsequent loop will modify
crime$density <- 0
## from highest to lowest density levels (which is default)
## get polygons, assign density val to points within them
for (i in 1:length(density.poly)) {
hold.data <- point.in.polygon(crime$lon, crime$lat,
density.poly[[i]]$x, density.poly[[i]]$y)
crime$density[which(hold.data==1)] <- density.poly[[i]]$level
}
## create year column for animation iteration
crime <- crime %>%
mutate(Date.Occurred = as.Date(Date.Occurred, format = "%m/%d/%Y")) %>%
mutate(year = format(Date.Occurred, "%Y")) %>%
mutate(year = as.integer(year))
## Get Google Map Object from API
ggmap::register_google(key = "AIzaSyBgYVpL1in38KcEnznCFwmRKz2d1VJKSe4")
ggmap_show_api_key()
la.map <- ggmap(get_googlemap("Los Angeles, California"))
#### point with color test ####
crime.density <- la.map +
geom_point(crime, mapping = aes(x = lon, y = lat,
alpha = 0.01,
color = density,
frame = year), size = 0.005, shape = ".") +
scale_color_gradient2(low = "white",
mid = "lightblue",
high = "blue",
midpoint = mean(crime$density, na.rm = T)) +
theme(
legend.position='none'
)
animation::ani.options(interval = 2)
crime.density.anim <- crime.density +
transition_states(crime$year,
transition_length = 10,
state_length = 50) +
labs(title = "Year: {closest_state}")
animate(crime.density.anim, nframes = 50)
Female Crime Reports, By Relative Density |
Male Crime Reports, By Relative Density |
The software I was using on macOS, RMarkdown, does not always recognize path variables when specified in bash_profile. To stitch the images I produced together into gif’s, I had to use ImageMagick from my computer’s Terminal application. Otherwise, everything else was done with this R code.
#### LIBRARIES ####
## DATA MANAGEMENT
library(tidyverse)
library(magrittr)
library(tictoc)
library(MASS)
## MAPS
library(sf)
library(rgdal)
# devtools::install_github("dkahle/ggmap", ref = "tidyup", force=TRUE)
library("ggmap")
library(sp)
library(scales)
#Set your API Key
ggmap::register_google(key = "AIzaSyBgYVpL1in38KcEnznCFwmRKz2d1VJKSe4")
# library(ggiraph)
# library(widgetframe)
## VISUALIZATION
# devtools::install_github('cran/ggplot2')
library(ggplot2)
library(gganimate)
library(gifski)
## install.packages("/Users/charliecarter/Downloads/gifski_0.8.6.tar", repos = NULL,type = "binary")
## trying alternative density measure
## https://slowkow.com/notes/ggplot2-color-by-density/
# Get density of points in 2 dimensions.
# @param x A numeric vector.
# @param y A numeric vector.
# @param n Create a square n by n grid to compute density.
# @return The density within each square.
get_density <- function(x, y, ...) {
dens <- MASS::kde2d(x, y, ...)
ix <- findInterval(x, dens$x)
iy <- findInterval(y, dens$y)
ii <- cbind(ix, iy)
return(dens$z[ii])
}
## Create column with year data
crime <- crime %>%
mutate(Date.Occurred = as.Date(Date.Occurred, format = "%m/%d/%Y")) %>%
mutate(year = format(Date.Occurred, "%Y")) %>%
mutate(year = as.factor(year))
## Create female subset
fem.crime <- crime %>%
filter(Victim.Sex == "F")
## Create male subset
male.crime <- crime %>%
filter(Victim.Sex == "M")
## get density of male and female crime reports in subsets, by year
for(year in levels(fem.crime$year)) {
fem.crime$dens_f[fem.crime$year == year] <-
get_density(fem.crime$lon[fem.crime$year == year],
fem.crime$lat[fem.crime$year == year],
n= 300)
}
for(year in levels(male.crime$year)) {
male.crime$dens_m[male.crime$year == year] <-
get_density(male.crime$lon[male.crime$year == year],
male.crime$lat[male.crime$year == year],
n= 300)
}
###########
# ggmap::register_google(key = "AIzaSyBgYVpL1in38KcEnznCFwmRKz2d1VJKSe4")
# ggmap_show_api_key()
# la.map <- ggmap(get_googlemap("Los Angeles, California"))
setwd("/Users/charliecarter/Documents/Code Resume/Data")
saveRDS(la.map, file = "LA_map")
### FEMALE ####
if(!dir.exists("/Users/charliecarter/examples")) {
dir.create("/Users/charliecarter/examples")
}
if(!dir.exists("/Users/charliecarter/examples/fem_crime")) {
dir.create("/Users/charliecarter/examples/fem_crime")
}
setwd("/Users/charliecarter/examples/fem_crime")
# iterate over years and make map png's
png(file="femcrime%02d.png", width=400, height=400)
for (year in levels(fem.crime$year)){
n <- nrow(fem.crime[fem.crime$year == year,])
print(paste(year, ": ", n))
#### point with color test ####
crime.density <- la.map +
geom_point(fem.crime[fem.crime$year == year,1:ncol(fem.crime)],
mapping = aes(x = lon,
y = lat,
alpha = 0.01,
color = dens_f),
size = 0.005,
shape = ".") +
scale_color_viridis_c(option = "A", alpha = 0.5) +
labs(title = paste("Los Angeles Crime Reports With Female Victims,\nBy Density\nYear: ",year,"\nNumber of Reports: ", n)) +
theme(
legend.position='none'
)
plot(crime.density)
}
dev.off()
### MALE ####
if(!dir.exists("/Users/charliecarter/examples")) {
dir.create("/Users/charliecarter/examples")
}
if(!dir.exists("/Users/charliecarter/examples/male_crime")) {
dir.create("/Users/charliecarter/examples/male_crime")
}
setwd("/Users/charliecarter/examples/male_crime")
# iterate over years and make map png's
png(file="malecrime%02d.png", width=400, height=400)
for (year in levels(male.crime$year)) {
n <- nrow(male.crime[male.crime$year == year,])
print(paste(year, ": ", n))
#### point with color test ####
crime.density <- la.map +
geom_point(male.crime[male.crime$year == year, 1:ncol(male.crime)],
mapping = aes(x = lon,
y = lat,
alpha = 0.01,
color = dens_m),
size = 0.005,
shape = ".") +
scale_color_viridis_c(option = "A", alpha = 0.5) +
labs(title = paste("Los Angeles Crime Reports With Male Victims,\nBy Density\nYear: ",year,"\nNumber of Reports: ", n)) +
theme(
legend.position='none'
)
plot(crime.density)
}
dev.off()
These two gif’s are synchronized using JavaScript code.
<script type="text/javascript">
$(window).load(function() {
$('.preload').attr('src', function(i,a){
$(this).attr('src','').removeClass('preload').attr('src',a);
});
});
</script>
# Data from https://www.understandingsociety.ac.uk
# Read in and join data from 7 different files
library(tidyverse)
library(data.table)
# data.table is faster compared to readr so we'll use it in this case (the function fread()). You need to install this package first to be able to run this code.
# create a vector with the file names and paths
files <- dir(
# Select the folder where the files are stored.
"/Users/charliecarter/Documents/EXETER/MODULES TERM 4/Data Analysis III/priv_data3_2019/data/UKDA-6614-tab/tab",
# Tell R which pattern you want present in the files it will display.
pattern = "indresp",
# We want this process to repeat through the entire folder.
recursive = TRUE,
# And finally want R to show us the entire file path, rather than just
# the names of the individual files.
full.names = TRUE)
# Select only files from the UKHLS.
files <- files[stringr::str_detect(files, "ukhls")]
# files
# create a vector of variable names
vars <- c("memorig", "sex_dv", "age_dv", "vote6")
for (i in 1:7) {
# Create a vector of the variables with the correct prefix.
varsToSelect <- paste(letters[i], vars, sep = "_")
# Add pidp to this vector (no prefix for pidp)
varsToSelect <- c("pidp", varsToSelect)
# Now read the data.
data <- fread(files[i], select = varsToSelect)
if (i == 1) {
all7 <- data
}
else {
all7 <- full_join(all7, data, by = "pidp")
}
# Now we can remove data to free up the memory.
rm(data)
}
## create long version from wide data
Long <- all7 %>%
## gather data into the "very long" format
gather(a_memorig:g_vote6, key = "variable", value = "value") %>%
## split the column with variable names into two (one for wave and one for generic variable name)
separate(variable, into = c("wave", "variable"), sep = "_", extra = "merge") %>%
## convert into the format we need
spread(key = variable, value = value)
# Modify dataset
Long <- Long %>%
# Only use data from Understanding Society Waves
filter(memorig == 1) %>%
# Recode sex variable to "Male" or "Female" character string
mutate(sex_dv = ifelse(sex_dv == 2, "Female",
ifelse(sex_dv == 1, "Male",
NA_character_))) %>%
# recode vote 6 to logical binary
mutate(vote6 = case_when(vote6 < 0 ~ NA_integer_, TRUE ~ vote6))
# create histogram plot of political interest levels
hist.interest <- Long %>%
# filter out NA's
filter(!is.na(sex_dv)) %>%
ggplot() +
# create histogram
geom_histogram(aes(x = vote6, fill=sex_dv)) +
# facet by sex
facet_grid(cols = vars(sex_dv)) +
# adjust graph labels
labs(title = "Comparison of Mean Political Interest, by Gender",
fill = "Respondent Gender",
subtitle = "From Understanding Society: The UK Household Longitudinal Survey") +
# create x axis scale
scale_x_reverse(labels = c("Very", "Fairly", "Not Very", "Not at all"),
name= "How interested would you say you are in politics?") +
# increase panel spacing
theme(panel.spacing = unit(2, "lines"))